Parameters

set.seed(235987)
assetName = 'Bitcoin'
numBootstraps = 5000
windowSizeMins = 180

Feature definition

import(db)
import(util)

df = dbGetQuery(db$getPool(), '
SELECT
  ts
, asset_name
, target
, close
, regr_slope(close, extract(epoch FROM ts)::REAL)
  OVER (PARTITION BY asset_id ORDER BY ts ASC ROWS 10 PRECEDING)
FROM trn
WHERE asset_name = $1
;',
  params = list(assetName))

setDT(df)

Select bootstrap samples and calculate correlations

t_end = c()
corr_ts = c()
corr_close = c()
corr_regr_slope = c()

pb = util$pbar('Bootstrap sampling', numBootstraps)
pb$tick(0)
## <progress_bar>
##   Public:
##     clone: function (deep = FALSE) 
##     finished: FALSE
##     initialize: function (format = "[:bar] :percent", total = 100, width = getOption("width") - 
##     message: function (msg, set_width = TRUE) 
##     terminate: function () 
##     tick: function (len = 1, tokens = list()) 
##     update: function (ratio, tokens = list()) 
##   Private:
##     callback: function (self) 
##     chars: list
##     clear: FALSE
##     clear_line: function (width) 
##     complete: FALSE
##     current: 0
##     cursor_to_start: function () 
##     first: FALSE
##     format: Bootstrap sampling [:bar] :current/:total (:percent) :el ...
##     has_token: TRUE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE
##     last_draw: 
##     message_class: NULL
##     progress_message: function (..., domain = NULL, appendLF = TRUE) 
##     ratio: function () 
##     render: function (tokens) 
##     show_after: 0
##     spin: function () 
##     start: 2021-11-27 23:54:53
##     supported: FALSE
##     total: 5000
##     toupdate: TRUE
##     width: 110
for (i in 1:numBootstraps) {
  rangeEnd = sample(df[,ts], 1)
  rangeStart = rangeEnd - as.difftime(windowSizeMins, units = 'mins')

  dfp = df[(ts > rangeStart) & (ts <= rangeEnd),]

  t_end <- append(t_end, rangeEnd)

  corr_ts <- append(corr_ts,
    dfp[,ts] |> as.numeric() |> cor(dfp[,target], use = "complete.obs")
  )

  corr_close <- append(corr_close,
    cor(dfp[,close], dfp[,target], use = "complete.obs")
  )

  corr_regr_slope <- append(corr_regr_slope,
    cor(dfp[,regr_slope], dfp[,target], use = "complete.obs")
  )

  pb$tick()
}

Debug: plots of the data

p1 =
  df[ts < rangeEnd][ts > rangeStart] |>
  melt(id.vars = 'ts', measure.vars = c('close', 'regr_slope', 'target')) |>
  ggplot(aes(ts, value)) +
  geom_line() +
  facet_wrap(~variable, scales = "free", ncol = 1)

ggplotly(p1)
p2 =
  ggplot(df, aes(regr_slope, target)) +
  geom_bin_2d()

ggplotly(p2)
## Warning: Removed 305 rows containing non-finite values (stat_bin2d).

Correlation violins

dfCorrs = data.table(
  t_end = t_end,
  corr_ts = corr_ts,
  corr_close = corr_close,
  corr_regr_slope = corr_regr_slope
)

dfCorrs |>
  melt(measure.vars = c("corr_ts", "corr_close", "corr_regr_slope")) |>
  ggplot(aes(variable, value)) +
  geom_hline(yintercept = 0) +
  geom_violin()